#####################################
## "Backyard politics in Foreign Aid"
## William Christiansen
## Tobias Heinrich
## Timothy Peterson
#####################################

## File that generates files used on  
## the survey experiment website

## Make list of large cities
############################
cities <- read.csv("data/Cities-Wikipedia.csv", stringsAsFactors = FALSE)
tmp <- str_split(cities$Location, pattern="N")
cities$Longitude <- NA
cities$Latitude <- NA
for(i in 1:nrow(cities))
{
  tmp1 <- gsub("W", "", tmp[[i]])
  tmp1 <- gsub("°", "", tmp1)
  tmp1 <- gsub(" ", "", tmp1)
  cities[i, c("Latitude", "Longitude")] <- as.numeric(tmp1)
}
cities$Location <- NULL
colnames(cities)[1:3] <- c("City", "statename", "Population")
cities$Population <- str_replace_all(string=cities$Population, pattern=",", 
                                     replacement="")
cities$Population <- as.numeric(cities$Population)
cities$City <- gsub("\\d", "", cities$City)
cities$City <- gsub("\\[", "", cities$City)
cities$City <- gsub("\\]", "", cities$City)
u_states <- sort(unique(cities$statename))
u_cities <- c()
for(i in 1:length(u_states))
{
  tmp <- subset(cities, statename == u_states[i]) 
  to_keep <- unique(c(which.max(tmp$Population), which(tmp$Population > 250000)))
  
  u_cities <- rbind(u_cities,
                    data.frame(statename=tmp$statename[1],
                               City=tmp$City[to_keep],
                               Longitude=-1 * tmp$Longitude[to_keep],
                               Latitude=tmp$Latitude[to_keep]))
}
u_cities <- subset(u_cities, statename != "District of Columbia")
u_cities <- subset(u_cities, City != "North Las Vegas")

## Insert abbreviations
states <- read.csv("data/State abbreviations.csv", stringsAsFactors = FALSE)
colnames(states) <- c("statename", "state")
u_cities <- merge(u_cities, states, by="statename", all.x=T)
u_cities$state[u_cities$statename == "Hawai'i"] <- "HI"


## Make dataset with nearest city to every zip code
###################################################
## Look at contracts for FY2015 from USAID
usaid <- read.csv("data/FY2015 USAID contracts.csv")
usaid <- usaid[, c("vendorname", "vendor_state_code", 
                   "vendorcountrycode", "dollarsobligated")]
usaid <- na.omit(usaid)
usaid <- subset(usaid, vendorcountrycode == "UNITED STATES")

## Numbers/Volumes
##################
contracts <- ddply(.data=subset(usaid, dollarsobligated > 0), .variable=c("vendor_state_code"),
                   .fun=function(x) data.frame(Number=nrow(x),
                                               Volume=sum(x$dollarsobligated)))
contracts <- contracts[order(contracts$Number, decreasing = TRUE),]
contracts$Order <- 1:nrow(contracts)
contracts$NumberCS <- cumsum(contracts$Number)/sum(contracts$Number)
contracts$VolumeCS <- cumsum(contracts$Volume)/sum(contracts$Volume)
top_states <- as.character(contracts$vendor_state_code[2:10])


## Make table with Zip codes
############################
## Get/ prep zipcodes
data(zipcode)
zips <- zipcode[, c("zip", "state", "longitude", "latitude")]
colnames(zips)[3:4] <- c("zip_lon", "zip_lat")
zips$ncity <- ""
zips$ncity_lon <- NA
zips$ncity_lat <- NA
zips <- subset(zips, state %in% top_states)

## Pick nearest city within state
for(i in 1:nrow(zips))
{
  tmp <- subset(u_cities, state == zips$state[i])
  ## Distances in miles
  tmp$zip2ncity_dist <- distHaversine(p1=tmp[, c("Longitude", "Latitude")],
                                      p2=zips[i, c("zip_lon", "zip_lat")]) / 1000 * 0.621
  
  if(nrow(tmp) > 1) tmp <- subset(tmp, zip2ncity_dist == min(tmp$zip2ncity_dist))
  
  zips$ncity[i] <- as.character(tmp$City)
  zips$ncity_lon[i] <- tmp$Longitude
  zips$ncity_lat[i] <- tmp$Latitude
  zips$zip2ncity_dist[i] <- tmp$zip2ncity_dist
}


## Senators
###########
senators <- read.csv("data/Senators raw.csv", header = TRUE)[-1,]
colnames(senators) <- c("statename", "Senator_fullname", "Senator_party")
senators$Senator_party[senators$Senator_fullname == "Bernie Sanders"] <- "Democratic"
colnames(states) <- c("statename", "state")
senators <- merge(senators, states, by="statename", all.x=T)
## Extract last name
tmp <- str_split(senators$Senator_fullname, pattern=" ", n=2)
senators$Senator_lastname <- ""
for(i in 1:nrow(senators)) senators$Senator_lastname[i] <- tmp[[i]][2]
senators$Senator_lastname[senators$Senator_fullname == "Shelley Moore Capito"] <- "Capito"

## Prep for use in website vignette
senators <- ddply(.data=senators, .variables="statename",
                  .fun=function(x) data.frame(state=x$state[1],
                                              A_id1=paste0(x$Senator_fullname[1],
                                                           " (", ifelse(x$Senator_party[1] == "Republican", "R", "D"),
                                                           "-", x$state[1], ")"),
                                              A_id4=x$Senator_lastname[1],
                                              A_party=x$Senator_party[1],
                                              id0=x$state[1],
                                              B_id1=paste0(x$Senator_fullname[2],
                                                           " (", ifelse(x$Senator_party[2] == "Republican", "R", "D"),
                                                           "-", x$state[2], ")"),
                                              B_id4=x$Senator_lastname[2],
                                              B_party=x$Senator_party[2]))
write.csv(x=senators, file="output/Senators.csv")

## Senators/ zip
zips_ncity_senator <- merge(zips, senators, by="state", all.x=T)

## Save
#######
write.csv(zips_ncity_senator, file="output/ZIP_TOP10.csv")

rm(u_cities, states, tmp, to_keep, tmp1, u_states, i, cities, senators, zips_ncity_senator,
   zips, contracts, top_states, usaid)




